from bs4 import BeautifulSoup
import sys
sys.path.append('..\kernel')
sys.path.append('..\model')
import spider_main
import job_model
# 抓取
class CityController(object):
    def __init__(self):
        #实例化核心对象
        self.spider = spider_main.SpiderMain()
        self.model = job_model.JobModel()
    def index(self):
        chengshi=dict()
        url='http://www.lagou.com/'

        # 下载页面
        html_cont = self.spider.downloader.downloader(url)
        html_obj=BeautifulSoup(html_cont, 'html.parser', from_encoding='utf-8')
        # 开始节点,从这个开始节点开始往下遍历获取所有的省份和城市名单
        #start_node=html_obj.find('div',class_='level-3',attrs={'label-module':'para-title'})
        capital_list=html_obj.find_all('div',class_='menu_box')
        #chengshi[start_node.find('h3').contents[-1]]=''
        for title in capital_list:
            job_vocation = title.find('h2').string.strip()
            menu_sub=title.find_all('dl')
            for menu in menu_sub:
                dt = [result for result in menu.find('dt').strings if result.split()][0]
                dd=[]
                for a in menu.find_all('a'):
                    dd.append(a.string.strip('其它'))
                self.model.add_job(job_vocation,dt,dd)
                print(job_vocation,dt,dd)
if __name__=='__main__':
    obj=CityController()
    obj.index()